****APPENDIX TABLE F1****

cd "$main_dir/output/tables/appendix_f"

*********************************************************************************************************************************************************************
*********************************************************************************************************************************************************************
*COLUMNS 1-2 - GLOBAL SAMPLE (HARMONIZED DATASET)********************************************************************************************************************
*********************************************************************************************************************************************************************
*********************************************************************************************************************************************************************

global basic 	sex marital religion edu

//--------------------------------------------------------
//Read in relevant datasets
//--------------------------------------------------------

clear
use "$data_dir/clean/harmonized_dataset.dta"

merge m:1 ccode survey_year yob using "$data_dir/raw/lifeexp_variables_data/growth_rate_mpd_var_exp.dta", nogen keepusing(total_growth_rate_obs yob_ln_gdppc)
merge m:1 ccode survey_year yob using "$data_dir/raw/lifeexp_variables_data/growth_mpd_var_lambda_exp.dta", nogen keepusing(wgrowth_rate_neg2 wgrowth_rate_neg1 wgrowth_rate_neg0_8 ///
																															wgrowth_rate_neg0_5 wgrowth_rate_neg0_2 wgrowth_rate_neg0_5 ///
																															wgrowth_rate_0 wgrowth_rate_0_2 wgrowth_rate_0_5 ///
																															wgrowth_rate_0_8 wgrowth_rate_1 wgrowth_rate_2)

fegen subregion_yob = group(subregion yob) if yob!=-99
fegen svy_survey_year = group(survey_year survey)

cap drop sample gallup_sample
gen sample = 1 if born_country!=0 & total_growth_rate_obs>=0.9*(age-1) & total_growth_rate_obs!=.
gen gallup_sample = 1 if born_country==1 & total_growth_rate_obs>=0.9*(age-1) & total_growth_rate_obs!=. & survey=="Gallup"
replace gallup_sample = 1 if total_growth_rate_obs>=0.9*(age-1) & total_growth_rate_obs!=. & survey=="Gallup" & (born_country!=0) & survey_year==2006

//--------------------------------------------------------
//Create Table F1 (Column 1)
//--------------------------------------------------------

cap drop kfold_sample ccid fold
qui reghdfe trust_govt wgrowth_rate_1 yob_ln_gdppc if sample==1, absorb(ccode svy_survey_year subregion_yob age $basic) cluster(ccode)
gen kfold_sample = e(sample)
egen ccid = group(ccode)

preserve
keep if e(sample)==1

duplicates drop ccid, force
set seed 123456
gen double u = runiform()
egen fold = cut(u), group(5)
replace fold = fold+1
keep ccid fold
tempfile folds
save `folds', replace
restore

merge m:1 ccid using `folds', nogen
replace fold = . if kfold_sample!=1

matrix grid = J(11, 1, .)
matrix coln grid = avg_rmse
matrix rown grid = "-2" "-1" "-0.8" "-0.5" "-0.2" "0" "0.2" "0.5"  "0.8" "1" "2"

cap drop train valid yhat se 
local i = 0
local varlist wgrowth_rate_neg2 wgrowth_rate_neg1 wgrowth_rate_neg0_8 wgrowth_rate_neg0_5 wgrowth_rate_neg0_2 wgrowth_rate_0 wgrowth_rate_0_2 wgrowth_rate_0_5 wgrowth_rate_0_8 wgrowth_rate_1 wgrowth_rate_2
foreach var of local varlist {
	local ++i
	local total_rmse = 0
	
	forvalues f = 1/5 {
		
		gen train = fold != `f'
        gen valid = fold == `f'
		
		qui reghdfe trust_govt `var' yob_ln_gdppc if train==1 & kfold_sample==1, absorb(ccode svy_survey_year subregion_yob age $basic) cluster(ccode)
		
		gen yhat = _b[`var'] * `var' if valid == 1 & kfold_sample==1
		replace yhat = . if train==1
		
        gen se = (trust_govt - yhat)^2 if valid==1 & kfold_sample==1
        qui sum se if valid==1 & kfold_sample==1
        local rmse = sqrt(r(mean))
        local total_rmse = `total_rmse' + `rmse'

        drop train valid yhat se
		
		di "Fold `f'/5 of `var' done"
		
	}
	
	local avg_rmse = `total_rmse' / 5
    mat grid[`i', 1] = `avg_rmse'
    
}

matlist grid

preserve
svmat double grid, names(col)
drop if avg_rmse==.
keep avg_rmse
export delimited using "table_f1_column_1.csv", replace
restore
	
//--------------------------------------------------------
//Create Table F1 (Column 2)
//--------------------------------------------------------

cap drop kfold_sample ccid fold
qui reghdfe trust_govt wgrowth_rate_1 yob_ln_gdppc if gallup_sample==1, absorb(ccode survey_year subregion_yob age $basic) cluster(ccode)
gen kfold_sample = e(sample)
egen ccid = group(ccode)

preserve
keep if e(sample)==1

duplicates drop ccid, force
set seed 123456
gen double u = runiform()
egen fold = cut(u), group(5)
replace fold = fold+1
keep ccid fold
tempfile folds
save `folds', replace
restore

merge m:1 ccid using `folds', nogen
replace fold = . if kfold_sample!=1

matrix grid = J(11, 1, .)
matrix coln grid = avg_rmse
matrix rown grid = "-2" "-1" "-0.8" "-0.5" "-0.2" "0" "0.2" "0.5"  "0.8" "1" "2"

cap drop train valid yhat se 
local i = 0
local varlist wgrowth_rate_neg2 wgrowth_rate_neg1 wgrowth_rate_neg0_8 wgrowth_rate_neg0_5 wgrowth_rate_neg0_2 wgrowth_rate_0 wgrowth_rate_0_2 wgrowth_rate_0_5 wgrowth_rate_0_8 wgrowth_rate_1 wgrowth_rate_2
foreach var of local varlist {
	local ++i
	local total_rmse = 0
	
	forvalues f = 1/5 {
		
		gen train = fold != `f'
        gen valid = fold == `f'
		
		qui reghdfe trust_govt `var' yob_ln_gdppc if train==1 & kfold_sample==1, absorb(ccode survey_year subregion_yob age $basic) cluster(ccode)
		
		gen yhat = _b[`var'] * `var' if valid == 1 & kfold_sample==1
		replace yhat = . if train==1
		
        gen se = (trust_govt - yhat)^2 if valid==1 & kfold_sample==1
        qui sum se if valid==1 & kfold_sample==1
        local rmse = sqrt(r(mean))
        local total_rmse = `total_rmse' + `rmse'

        drop train valid yhat se
		
		di "Fold `f'/5 of `var' done"
		
	}
	
	local avg_rmse = `total_rmse' / 5
    mat grid[`i', 1] = `avg_rmse'
    
}

matlist grid
svmat double grid, names(col)
drop if avg_rmse==.
keep avg_rmse
export delimited using "table_f1_column_2.csv", replace


*****************************************************************************************************************************************************************
*****************************************************************************************************************************************************************
*COLUMNS 3-4 - US SAMPLE (ANES)**********************************************************************************************************************************
*****************************************************************************************************************************************************************
*****************************************************************************************************************************************************************

global basic 	sex marital religion edu ethnicity birth_state
global extra	employment_status occupation income urban
global time 	survey_year age generation
global polit    party partisan

//--------------------------------------------------------
//Read in relevant datasets
//--------------------------------------------------------

clear
use "$data_dir/clean/anes_final.dta"

egen scode_survey_year = group(state_code survey_year)

//--------------------------------------------------------
//Create Table F1 (Column 3)
//--------------------------------------------------------

cap drop ccid fold kfold_sample
qui reghdfe trust_fed_govt wgrowth_rate_1 yob_ln_gdppc, absorb(state_code $time $basic $extra $polit) cluster(yob)
gen kfold_sample = e(sample)
egen ccid = group(yob)

preserve
keep if e(sample)==1

duplicates drop ccid, force
set seed 123456
gen double u = runiform()
egen fold = cut(u), group(5)
replace fold = fold+1
keep ccid fold
tempfile folds
save `folds', replace
restore

merge m:1 ccid using `folds', nogen
replace fold = . if kfold_sample!=1

matrix grid = J(11, 1, .)
matrix coln grid = avg_rmse
matrix rown grid = "-2" "-1" "-0.8" "-0.5" "-0.2" "0" "0.2" "0.5" "0.8" "1" "2"

cap drop train valid yhat se 
local i = 0
local varlist wgrowth_rate_neg2 wgrowth_rate_neg1 wgrowth_rate_neg0_8 wgrowth_rate_neg0_5 wgrowth_rate_neg0_2 wgrowth_rate_0 wgrowth_rate_0_2 wgrowth_rate_0_5 wgrowth_rate_0_8 wgrowth_rate_1 wgrowth_rate_2
foreach var of local varlist {
	local ++i
	local total_rmse = 0
	
	forvalues f = 1/5 {
		
		gen train = fold != `f'
        gen valid = fold == `f'
		
		qui reghdfe trust_fed_govt `var' yob_ln_gdppc if train==1 & kfold_sample==1, absorb(state_code $time $basic $extra $polit) cluster(yob)

		gen yhat = _b[`var'] * `var' if valid == 1 & kfold_sample==1
		replace yhat = . if train==1
		
        gen se = (trust_fed_govt - yhat)^2 if valid==1 & kfold_sample==1
        qui sum se if valid==1 & kfold_sample==1
        local rmse = sqrt(r(mean))
        local total_rmse = `total_rmse' + `rmse'

        drop train valid yhat se
		
		di "Fold `f'/5 of `var' done"
		
	}
	
	local avg_rmse = `total_rmse'/5
    mat grid[`i', 1] = `avg_rmse'
    
}

matlist grid

preserve
svmat double grid, names(col)
drop if avg_rmse==.
keep avg_rmse
export delimited using "table_f1_column_3.csv", replace
restore


//--------------------------------------------------------
//Create Table F1 (Column 4)
//--------------------------------------------------------

cap drop ccid fold kfold_sample
qui reghdfe trust_fed_govt wgrowth_rate_1 yob_ln_gdppc, absorb(scode_survey_year $time $basic $extra $polit) cluster(yob)
gen kfold_sample = e(sample)
egen ccid = group(yob)

preserve
keep if e(sample)==1

duplicates drop ccid, force
set seed 123456
gen double u = runiform()
egen fold = cut(u), group(5)
replace fold = fold+1
keep ccid fold
tempfile folds
save `folds', replace
restore

merge m:1 ccid using `folds', nogen
replace fold = . if kfold_sample!=1

matrix grid = J(11, 1, .)
matrix coln grid = avg_rmse
matrix rown grid = "-2" "-1" "-0.8" "-0.5" "-0.2" "0" "0.2" "0.5" "0.8" "1" "2"

cap drop train valid yhat se 
local i = 0
local varlist wgrowth_rate_neg2 wgrowth_rate_neg1 wgrowth_rate_neg0_8 wgrowth_rate_neg0_5 wgrowth_rate_neg0_2 wgrowth_rate_0 wgrowth_rate_0_2 wgrowth_rate_0_5 wgrowth_rate_0_8 wgrowth_rate_1 wgrowth_rate_2
foreach var of local varlist {
	local ++i
	local total_rmse = 0
	
	forvalues f = 1/5 {
		
		gen train = fold != `f'
        gen valid = fold == `f'
		
		qui reghdfe trust_fed_govt `var' yob_ln_gdppc if train==1 & kfold_sample==1, absorb(scode_survey_year $time $basic $extra $polit) cluster(yob)

		gen yhat = _b[`var'] * `var' if valid == 1 & kfold_sample==1
		replace yhat = . if train==1
		
        gen se = (trust_fed_govt - yhat)^2 if valid==1 & kfold_sample==1
        qui sum se if valid==1 & kfold_sample==1
        local rmse = sqrt(r(mean))
        local total_rmse = `total_rmse' + `rmse'

        drop train valid yhat se
		
		di "Fold `f'/5 of `var' done"
		
	}
	
	local avg_rmse = `total_rmse'/5
    mat grid[`i', 1] = `avg_rmse'
    
}

matlist grid

svmat double grid, names(col)
drop if avg_rmse==.
keep avg_rmse
export delimited using "table_f1_column_4.csv", replace


*******************************************************************************************************************************************************************
*******************************************************************************************************************************************************************
*COLUMNS 5-6 - SWISS SAMPLE (SHP)**********************************************************************************************************************************
*******************************************************************************************************************************************************************
*******************************************************************************************************************************************************************

global basic 	marital edu
global econ	    employment_status occupation income imputed_income
global design 	survey_year age language
global polit    ideology party_member

//--------------------------------------------------------
//Read in relevant datasets
//--------------------------------------------------------

clear
use "$data_dir/clean/shp_final_panel.dta"

xtset idpers survey_year

gen dummy = 1 if trust_fed_govt_med!=.
bys idpers: egen total_obs = total(dummy)

cap drop sample
gen sample = 1 if live_switz==1 & total_obs>4 & total_obs!=. & age>=15 & age!=.

//--------------------------------------------------------
//Create Table F1 (Column 5)
//--------------------------------------------------------

cap drop kfold_sample ccid fold
qui reghdfe trust_fed_govt_med wgrowth_rate_1 if sample==1, absorb(canton $basic $design $econ $polit idpers) cluster(idhous yob)
gen kfold_sample = e(sample)
egen ccid = group(yob)

preserve
keep if e(sample)==1

duplicates drop ccid, force
set seed 123456
gen double u = runiform()
egen fold = cut(u), group(5)
replace fold = fold+1
keep ccid fold
tempfile folds
save `folds', replace
restore

merge m:1 ccid using `folds', nogen
replace fold = . if kfold_sample!=1

matrix grid = J(11, 1, .)
matrix coln grid = avg_rmse
matrix rown grid = "-2" "-1" "-0.8" "-0.5" "-0.2" "0" "0.2" "0.5" "0.8" "1" "2"

cap drop train valid yhat se 
local i = 0
local varlist wgrowth_rate_neg2 wgrowth_rate_neg1 wgrowth_rate_neg0_8 wgrowth_rate_neg0_5 wgrowth_rate_neg0_2 wgrowth_rate_0 wgrowth_rate_0_2 wgrowth_rate_0_5 wgrowth_rate_0_8 wgrowth_rate_1 wgrowth_rate_2
foreach var of local varlist {
	local ++i
	local total_rmse = 0
	
	forvalues f = 1/5 {
		
		gen train = fold != `f'
        gen valid = fold == `f'
		
		qui reghdfe trust_fed_govt_med `var' if train==1 & kfold_sample==1, absorb(canton $basic $design $econ $polit idpers) cluster(idhous yob)
		
		gen yhat = _b[`var'] * `var' if valid == 1 & kfold_sample==1
		replace yhat = . if train==1 & kfold_sample==1
        gen se = (trust_fed_govt_med - yhat)^2 if valid==1 & kfold_sample==1
        qui sum se if valid==1 & kfold_sample==1
        local rmse = sqrt(r(mean))
        local total_rmse = `total_rmse' + `rmse'

        drop train valid yhat se
		
		di "Fold `f'/5 of `var' done"
		
	}
	
	local avg_rmse = `total_rmse'/5
    mat grid[`i', 1] = `avg_rmse'
    
}

matlist grid

preserve
svmat double grid, names(col)
drop if avg_rmse==.
keep avg_rmse
export delimited using "table_f1_column_5.csv", replace
restore

//--------------------------------------------------------
//Create Table F1 (Column 6)
//--------------------------------------------------------

cap drop kfold_sample ccid fold
qui reghdfe trust_fed_govt_med wgrowth_rate_1 if sample==1, absorb(canton $basic $design $econ $polit idhous) cluster(idhous yob)
gen kfold_sample = e(sample)
egen ccid = group(yob)

preserve
keep if e(sample)==1

duplicates drop ccid, force
set seed 123456
gen double u = runiform()
egen fold = cut(u), group(5)
replace fold = fold+1
keep ccid fold
tempfile folds
save `folds', replace
restore

merge m:1 ccid using `folds', nogen
replace fold = . if kfold_sample!=1

matrix grid = J(11, 1, .)
matrix coln grid = avg_rmse
matrix rown grid = "-2" "-1" "-0.8" "-0.5" "-0.2" "0" "0.2" "0.5" "0.8" "1" "2"

cap drop train valid yhat se 
local i = 0
local varlist wgrowth_rate_neg2 wgrowth_rate_neg1 wgrowth_rate_neg0_8 wgrowth_rate_neg0_5 wgrowth_rate_neg0_2 wgrowth_rate_0 wgrowth_rate_0_2 wgrowth_rate_0_5 wgrowth_rate_0_8 wgrowth_rate_1 wgrowth_rate_2
foreach var of local varlist {
	local ++i
	local total_rmse = 0
	
	forvalues f = 1/5 {
		
		gen train = fold != `f'
        gen valid = fold == `f'
		
		qui reghdfe trust_fed_govt_med `var' if train==1 & kfold_sample==1, absorb(canton $basic $design $econ $polit idhous) cluster(idhous yob)
		
		gen yhat = _b[`var'] * `var' if valid == 1 & kfold_sample==1
		replace yhat = . if train==1 & kfold_sample==1
        gen se = (trust_fed_govt_med - yhat)^2 if valid==1 & kfold_sample==1
        quietly sum se if valid==1 & kfold_sample==1
        local rmse = sqrt(r(mean))
        local total_rmse = `total_rmse' + `rmse'

        drop train valid yhat se
		
		di "Fold `f'/5 of `var' done"
		
	}
	
	local avg_rmse = `total_rmse'/5
    mat grid[`i', 1] = `avg_rmse'
    
}

matlist grid

svmat double grid, names(col)
drop if avg_rmse==.
keep avg_rmse
export delimited using "table_f1_column_6.csv", replace

